Evaluation of RISK survival models

This document highlights the use of

for the evaluation (RRPlot), and calibration of cox models (CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of survival data.

Furthermore, it can be used to evaluate any Risk index that reruns the probability of a future event on external data-set.

This document will use the survival::rotterdam, and survival::gbsg data-sets to train and predict the risk of cancer recurrence after surgery. Both Cox and Logistic models will be trained and evaluated.

Here are some sample plots returned by the evaluated functions:

The libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

Breast Cancer Royston-Altman data

data(gbsg, package=“survival”) and data(rotterdam, package=“survival”)

gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL

odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL

odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]

odata$size <- 10*(odata$size=="<=20") + 
  35*(odata$size=="20-50") + 
  60*(odata$size==">50")

data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))

data$`(Intercept)` <- NULL

dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)

colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years


pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
rotterdam
0 1
1464 1518

data(gbsg, package=“survival”) data conditioning

gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))

data$`(Intercept)` <- NULL

dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)

colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365

pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
gbsg
0 1
499 183

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=1,NumberofRepeats = 5)

—–.

sm <- summary(ml)
pander::pander(sm$coefficients)
  Estimate lower HR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 0.000716 1.001 1.001 1.001 0.626 0.600 0.632 0.630 0.601 0.634 0.03040 0.4594 12.81 14.37 0.033056 1
size_grade 0.005649 1.005 1.006 1.006 0.598 0.623 0.632 0.599 0.626 0.634 0.01868 0.3914 9.82 11.29 0.007947 1
nodes 0.086582 1.082 1.090 1.099 0.637 0.642 0.643 0.640 0.643 0.644 0.00745 0.0564 8.33 1.66 0.000148 1
size 0.006888 1.005 1.007 1.009 0.595 0.641 0.643 0.595 0.642 0.644 0.01447 0.3587 8.05 9.97 0.001322 1
size_nodes -0.000378 1.000 1.000 1.000 0.624 0.643 0.643 0.629 0.644 0.644 0.00346 0.3430 7.25 9.57 -0.000377 1
age_size -0.000149 1.000 1.000 1.000 0.567 0.627 0.632 0.568 0.630 0.634 0.00635 0.1935 5.95 5.36 0.004078 1
grade 0.204934 1.146 1.227 1.314 0.565 0.637 0.643 0.561 0.638 0.644 0.00926 0.2069 5.88 6.31 0.005344 1
age -0.003113 0.996 0.997 0.998 0.513 0.628 0.643 0.513 0.628 0.644 0.00416 0.0917 5.27 2.51 0.015465 1
grade_nodes -0.013784 0.981 0.986 0.992 0.635 0.645 0.643 0.639 0.646 0.644 0.00207 -0.0910 5.03 -2.55 -0.002609 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 5 # Five years

h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time <= timeinterval)
h0 <- h0/sum((dataBrestCancerTrain$time > timeinterval) | (dataBrestCancerTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.429 5
index <- predict(ml,dataBrestCancerTrain)
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event

toinclude <- rdata[,1]==1 
obstiemToEvent <- dataBrestCancerTrain[,"time"]
tmin<-min(obstiemToEvent)
sum(toinclude)

[1] 1518

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.552 0.0112 49.4 3.39e-318
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1518 2.67 0.617 0.616
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)

3.12

The Time vs. Events are not calibrated. Lets do the calibration

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.459 0.389 0.320 0.214 0.18549 0.4996
RR 1.690 1.713 1.799 2.376 1.00000 1.7255
RR_LCI 1.586 1.603 1.666 1.869 0.00000 1.6196
RR_UCI 1.802 1.830 1.942 3.019 0.00000 1.8383
SEN 0.299 0.462 0.644 0.965 1.00000 0.2464
SPE 0.900 0.798 0.646 0.125 0.00137 0.9310
BACC 0.599 0.630 0.645 0.545 0.50068 0.5887
NetBenefit 0.110 0.172 0.246 0.374 0.39742 0.0916
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.984 1.09 0.183
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.14 1.14 1.13 1.15
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.31 1.31 1.31 1.32
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.662 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.459 0.389
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time")


pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.711 1.37 7.17

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Time to event after calibration

timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
  Estimate Std. Error t value Pr(>|t|)
timetoEvent[toinclude] 0.637 0.0129 49.4 3.39e-318
Fitting linear model: obstiemToEvent[toinclude] ~ 0 + timetoEvent[toinclude]
Observations Residual Std. Error \(R^2\) Adjusted \(R^2\)
1518 2.67 0.617 0.616
plot(timetoEvent,obstiemToEvent,
     col=1+rdata[,1],
     xlab="Expected time",
     ylab="Observed time",
     main="Expected vs. Observed",
     xlim=c(tmin,tmax),
     ylim=c(tmin,tmax),
     log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
             pch=c(1,1,-1),
             col=c(1,2,"blue"),
             lty=c(-1,-1,1)
             )

MADerror2 <- c(MADerror2,mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude])))
pander::pander(MADerror2)

3.12 and 2.65

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6383 0.558 0.472 0.329 0.28808 0.499
RR 1.6904 1.713 1.799 2.376 1.00000 1.741
RR_LCI 1.5860 1.603 1.666 1.869 0.00000 1.620
RR_UCI 1.8018 1.830 1.942 3.019 0.00000 1.872
SEN 0.2991 0.462 0.644 0.965 1.00000 0.589
SPE 0.8996 0.798 0.646 0.125 0.00137 0.691
BACC 0.5993 0.630 0.645 0.545 0.50068 0.640
NetBenefit 0.0653 0.111 0.173 0.281 0.31069 0.149
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.896 0.852 0.942 1.44e-05
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.988 0.988 0.982 0.995
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.03 1.03 1.03 1.03
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.676 0.676 0.662 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.694 0.675 0.713
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.299 0.276 0.323
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.638 0.558
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 465.079317 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1985 816 1144 93.9 385.7
class=1 396 248 177 28.0 31.8
class=2 601 454 197 336.3 391.3

Performance on the external data set

index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")

par(op)


prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrCoxTestAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

External Data Report

pander::pander(t(rrCoxTestAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.638 @:0.558 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6378 0.5580 0.5899 0.342 2.75e-01 0.4992
RR 1.7994 1.6427 1.7581 3.279 2.64e+01 1.5739
RR_LCI 1.5366 1.3951 1.4980 1.641 5.65e-02 1.3253
RR_UCI 2.1071 1.9343 2.0632 6.552 1.23e+04 1.8691
SEN 0.3311 0.4515 0.4181 0.977 1.00e+00 0.5619
SPE 0.8734 0.7571 0.8088 0.111 1.55e-02 0.6382
BACC 0.6022 0.6043 0.6134 0.544 5.08e-01 0.6001
NetBenefit 0.0186 0.0238 0.0271 0.166 2.25e-01 0.0415
pander::pander(t(rrCoxTestAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.26 1.12 1.41 9.48e-05
pander::pander(rrCoxTestAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176738

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.665 0.633 0.694
pander::pander(t(rrCoxTestAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrCoxTestAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.328 0.275 0.384
pander::pander((rrCoxTestAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.873 0.836 0.905
pander::pander(t(rrCoxTestAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.638 0.558
pander::pander(rrCoxTestAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 81.471750 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 457 164 221.4 14.888 58.181
class=1 82 37 33.2 0.438 0.494
class=2 147 98 44.4 64.710 77.254

Calibrating the index on the test data

calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.604 1.04 5.73
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)

rrAnalysis <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

After Calibration Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6284 0.5247 0.5310 0.299 2.39e-01 0.5002
RR 1.7405 1.7325 1.7581 3.279 2.64e+01 1.6427
RR_LCI 1.4790 1.4751 1.4980 1.641 5.65e-02 1.3951
RR_UCI 2.0483 2.0347 2.0632 6.552 1.23e+04 1.9343
SEN 0.2676 0.4247 0.4181 0.977 1.00e+00 0.4515
SPE 0.8992 0.7984 0.8088 0.111 1.55e-02 0.7571
BACC 0.5834 0.6116 0.6134 0.544 5.08e-01 0.6043
NetBenefit 0.0205 0.0596 0.0601 0.212 2.61e-01 0.0597
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.18 1.05 1.33 0.00418
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.664

  • Dxy: 0.328

  • S.D.: 0.0311

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 176736

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.664 0.665 0.635 0.694
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.66 0.619 0.7
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.264 0.215 0.318
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.865 0.927
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.629 0.525
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 80.835092 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 482 173 232.4 15.20 69.5
class=1 86 47 32.0 7.02 7.9
class=2 118 79 34.6 57.14 65.4

Logistic Model

Here we train a logistic model on the same data set

## Only label subjects that present event withing five years

dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL

#ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=20,NumberofRepeats = 5)
mlog <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=1,NumberofRepeats = 5)

—–..

sm <- summary(mlog)
pander::pander(sm$coefficients)
  Estimate lower OR upper u.Accuracy r.Accuracy full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI z.NRI Delta.AUC Frequency
size_nodes 1.05e-03 1.001 1.001 1.001 0.669 0.571 0.668 0.627 0.500 0.628 0.11233 0.63654 17.86 18.870 0.128490 1
nodes 4.33e-02 1.040 1.044 1.048 0.676 0.634 0.690 0.639 0.621 0.662 0.07110 0.57106 14.13 16.179 0.040494 1
grade_nodes 1.50e-02 1.014 1.015 1.016 0.682 0.637 0.686 0.649 0.624 0.655 0.06580 0.54866 13.66 15.650 0.031087 1
age_nodes 1.06e-03 1.001 1.001 1.001 0.678 0.653 0.686 0.642 0.621 0.657 0.03346 0.21312 9.39 5.710 0.035896 1
size_grade 1.75e-03 1.001 1.002 1.002 0.632 0.682 0.686 0.626 0.646 0.655 0.01787 0.29411 6.74 7.728 0.008648 1
age_size 8.73e-05 1.000 1.000 1.000 0.608 0.682 0.686 0.577 0.649 0.657 0.01534 0.29152 6.41 7.652 0.007600 1
grade 2.27e-01 1.168 1.254 1.347 0.571 0.683 0.690 0.500 0.653 0.662 0.01340 0.19036 6.20 4.983 0.008461 1
age_meno -6.04e-03 0.992 0.994 0.996 0.571 0.676 0.686 0.500 0.645 0.657 0.00782 0.08057 4.76 2.337 0.012065 1
age_pgr -5.42e-06 1.000 1.000 1.000 0.571 0.686 0.686 0.500 0.656 0.657 0.00512 0.00745 4.11 0.194 0.000417 1
age_grade -1.65e-03 0.997 0.998 0.999 0.574 0.690 0.690 0.507 0.661 0.662 0.00454 0.11372 3.60 2.960 0.000315 1
meno_grade 1.02e-01 1.045 1.107 1.173 0.571 0.683 0.686 0.500 0.652 0.657 0.00425 0.20428 3.47 5.343 0.004441 1
nodes_hormon -1.38e-02 0.979 0.986 0.994 0.587 0.688 0.686 0.526 0.658 0.655 0.00280 0.45522 3.44 12.150 -0.002853 1
size 3.94e-03 1.002 1.004 1.006 0.611 0.693 0.690 0.618 0.663 0.662 0.00507 0.21050 3.42 5.600 -0.001075 1
meno_pgr 3.19e-04 1.000 1.000 1.001 0.571 0.687 0.686 0.500 0.657 0.657 0.00316 0.05977 3.35 1.558 -0.000429 1
pgr -1.07e-04 1.000 1.000 1.000 0.571 0.689 0.686 0.500 0.659 0.655 0.00257 0.19759 2.64 5.745 -0.004123 1
meno_nodes -2.60e-02 0.955 0.974 0.994 0.640 0.686 0.686 0.595 0.656 0.657 0.00264 -0.06329 2.59 -1.645 0.000631 1
grade_pgr -3.51e-05 1.000 1.000 1.000 0.571 0.669 0.668 0.500 0.627 0.628 0.00241 0.17471 2.55 5.058 0.001252 1
meno_size 2.34e-03 1.000 1.002 1.004 0.604 0.691 0.690 0.578 0.663 0.662 0.00185 0.10227 2.43 2.662 -0.001378 1

Logistic Model Performance

op <- par(no.readonly = TRUE)


cprob <- predict(mlog,dataBrestCancerTrain)

rdata <- cbind(dataBrestCancerTrain$status,cprob)
rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5.0)

par(op)

Training Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.394 0.255 0.130969 0.500
RR 1.765 1.739 1.799 2.213 1.000000 1.773
RR_LCI 1.659 1.627 1.676 1.764 0.000000 1.665
RR_UCI 1.879 1.858 1.931 2.777 0.000000 1.888
SEN 0.327 0.470 0.566 0.962 1.000000 0.374
SPE 0.900 0.799 0.731 0.125 0.000683 0.874
BACC 0.613 0.635 0.648 0.543 0.500342 0.624
NetBenefit 0.108 0.165 0.202 0.342 0.435125 0.129
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.817 0.776 0.859 3.78e-16
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206594

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.694
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7

Results on the validation set using Logistic model

pre <- predict(mlog,dataBrestCancerTest)
rdata <- cbind(dataBrestCancerTest$status,pre)

rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=5)

par(op)

Validation Report

pander::pander(t(rrAnalysis$keyPoints),caption="Threshold values")
Threshold values
  @:0.541 @:0.431 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.542 0.431 0.439 0.306 2.31e-01 0.4996
RR 1.792 1.702 1.756 2.678 2.20e+01 1.7318
RR_LCI 1.529 1.428 1.477 1.679 4.75e-02 1.4731
RR_UCI 2.100 2.029 2.088 4.271 1.02e+04 2.0360
SEN 0.395 0.595 0.579 0.950 1.00e+00 0.4482
SPE 0.832 0.638 0.669 0.181 1.29e-02 0.7804
BACC 0.613 0.617 0.624 0.565 5.06e-01 0.6143
NetBenefit 0.060 0.105 0.106 0.210 2.69e-01 0.0717
pander::pander(t(rrAnalysis$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.03 0.921 1.16 0.556
pander::pander(rrAnalysis$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.638 0.699
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.541 0.431
pander::pander(rrAnalysis$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Logistic Model Poisson Calibration

riskdata <- cbind(dataBrestCancerTrain$status,predict(mlog,dataBrestCancerTrain,type="prob"),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Logistic Calibration Parameters")
h0 Gain DeltaTime
0.689 1.33 7.44
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain

rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBrestCancerTrain$time,
                     title="Cal. Logistic Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated logistic: training

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6463 0.528 0.486 0.324 0.170289 0.500
RR 1.7654 1.739 1.799 2.213 1.000000 1.764
RR_LCI 1.6587 1.627 1.676 1.764 0.000000 1.648
RR_UCI 1.8790 1.858 1.931 2.777 0.000000 1.889
SEN 0.3267 0.470 0.566 0.962 1.000000 0.519
SPE 0.8996 0.799 0.731 0.125 0.000683 0.765
BACC 0.6132 0.635 0.648 0.543 0.500342 0.642
NetBenefit 0.0763 0.129 0.163 0.284 0.408374 0.149
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.914 0.868 0.961 0.000392
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
  • C Index: 0.68

  • Dxy: 0.36

  • S.D.: 0.014

  • n: 2982

  • missing: 0

  • uncensored: 1518

  • Relevant Pairs: 6184528

  • Concordant: 4206593

  • Uncertain: 2703838

  • cstatCI:

    mean.C Index median lower upper
    0.68 0.68 0.667 0.693
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.695 0.677 0.714
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.327 0.303 0.351
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.9 0.883 0.915
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 541.976716 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 1974 804 1144 100.9 415.3
class=1 365 218 170 13.4 15.1
class=2 643 496 204 418.2 490.7
probLog <- predict(mlog,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)

rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysisTestLogistic <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
                     timetoEvent=dataBrestCancerTest$time,
                     title="Cal. Logistic Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

par(op)

Report of the calibrated validation

pander::pander(t(rrAnalysisTestLogistic$keyPoints),caption="Threshold values")
Threshold values
  @:0.645 @:0.528 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.645672 0.5277 0.5360 0.385 2.94e-01 0.5001
RR 1.791927 1.7024 1.7562 2.678 2.20e+01 1.7232
RR_LCI 1.529135 1.4283 1.4771 1.679 4.75e-02 1.4313
RR_UCI 2.099881 2.0290 2.0880 4.271 1.02e+04 2.0746
SEN 0.394649 0.5953 0.5786 0.950 1.00e+00 0.6555
SPE 0.832041 0.6382 0.6693 0.181 1.29e-02 0.5762
BACC 0.613345 0.6168 0.6239 0.565 5.06e-01 0.6159
NetBenefit -0.000601 0.0315 0.0367 0.125 2.04e-01 0.0466
pander::pander(t(rrAnalysisTestLogistic$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.16 1.03 1.3 0.0128
pander::pander(rrAnalysisTestLogistic$c.index,caption="C. Index")
  • C Index: 0.669

  • Dxy: 0.338

  • S.D.: 0.0309

  • n: 686

  • missing: 0

  • uncensored: 299

  • Relevant Pairs: 266144

  • Concordant: 178115

  • Uncertain: 203702

  • cstatCI:

    mean.C Index median lower upper
    0.669 0.669 0.64 0.698
pander::pander(t(rrAnalysisTestLogistic$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.669 0.628 0.709
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.395 0.339 0.453
pander::pander((rrAnalysisTestLogistic$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.832 0.791 0.868
pander::pander(t(rrAnalysisTestLogistic$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.645 0.528
pander::pander(rrAnalysisTestLogistic$surdif,caption="Logrank test")
Logrank test Chisq = 92.507991 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 369 121 181.7 20.2997 52.3868
class=1 134 60 61.7 0.0479 0.0604
class=2 183 118 55.5 70.2342 88.0195

Comparing the COX and Logistic Models on the Independent Data

pander::pander(t(rrCoxTestAnalysis$OAcum95ci))
mean 50% 2.5% 97.5%
1.01 1.01 1 1.01
pander::pander(t(rrAnalysisTestLogistic$OAcum95ci))
mean 50% 2.5% 97.5%
0.944 0.944 0.94 0.947
pander::pander(t(rrCoxTestAnalysis$OE95ci))
mean 50% 2.5% 97.5%
1.07 1.07 1.04 1.1
pander::pander(t(rrAnalysisTestLogistic$OE95ci))
mean 50% 2.5% 97.5%
0.981 0.982 0.956 1.01
maxobs <- sum(dataBrestCancerTest$status)

par(mfrow=c(1,2),cex=0.75)

plot(rrCoxTestAnalysis$CumulativeOvs[,1:2],type="l",lty=1,
     main="Cumulative Probability",
     xlab="Observed",
     ylab="Cumulative Probability",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$CumulativeOvs[,1:2],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)


plot(rrCoxTestAnalysis$CumulativeOvs$Observed,
     rrCoxTestAnalysis$CumulativeOvs$Cumulative-
       rrCoxTestAnalysis$CumulativeOvs$Observed,
     main="Cumulative Risk Difference",
     xlab="Observed",
     ylab="Cumulative Risk - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$CumulativeOvs$Observed,
     rrAnalysisTestLogistic$CumulativeOvs$Cumulative-
       rrAnalysisTestLogistic$CumulativeOvs$Observed,
     lty=2,
     col="red")
legend("topleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData[,2:3],type="l",lty=1,
     main="Expected over Time",
     xlab="Observed",
     ylab="Expected",
     ylim=c(0,maxobs),
     xlim=c(0,maxobs))
lines(rrAnalysisTestLogistic$OEData[,2:3],lty=2,col="red")
lines(x=c(0,maxobs),y=c(0,maxobs),lty=3,col="gray")
legend("topleft",legend = c("Cox","Logistic","Ideal"),
       col=c("black","red","gray"),
       lty=c(1,2,3),
       cex=0.75
)

plot(rrCoxTestAnalysis$OEData$Observed,
     rrCoxTestAnalysis$OEData$Expected-
       rrCoxTestAnalysis$OEData$Observed,
     main="Expected vs Observed Difference",
     xlab="Observed",
     ylab="Cumulative - Observed",
     type="l",
     lty=1)
lines(rrAnalysisTestLogistic$OEData$Observed,
     rrAnalysisTestLogistic$OEData$Expected-
       rrAnalysisTestLogistic$OEData$Observed,
     lty=2,col="red")

legend("bottomleft",legend = c("Cox","Logistic"),
       col=c("black","red"),
       lty=c(1,2),
       cex=0.75
)

par(op)